# This script contains auxiliary functions to run regression models 
# throughout the analysis

# function to run regression models and calculate marginal means and contrasts
# (for the main study only)
reg_run <- function(predictor_list, 
                    dep_var = "story_real",
                    model_name = "",
                    story_set = "1",
                    interaction_terms = "",
                    story_dummies = F,
                    day_dummies = T,
                    first_three_stories = F,
                    no_source_stories = F,
                    quantities_calculate = "contrasts",
                    contrast_var = "state_controlled",
                    contrast_filter = "Critical - (State-controlled)",
                    logit_model = F) {
  
  baseline_vars <- c(dep_var, "session_id", "story_label",
                     "survey_day")
  
  qz <- main_stories
  
  if (!first_three_stories) {
    qz <- qz |>
      # do not use stories for which sources were fixed in the experiment
      filter(!story_code %in% c(101, 102, 103))
  }
  
  if (no_source_stories) {
    # only include responses where no news outlet was assigned as treatment
    qz <- qz |> filter(story_source == "No source")
  }
  
  if (story_set == "1" | story_set == "2") {
    # restrict stories to first or second quiz
    qz <- qz |> 
      filter(set == as.numeric(story_set))
  }
  
  qz <- qz |>
    select(c(baseline_vars, predictor_list)) |>
    drop_na()
  
  # construct regression formula from the list of predictors and interactions
  if (length(predictor_list) > 1) {
    reg_formula <- paste(predictor_list, collapse = " + ")
    reg_formula <- paste0(dep_var, " ~ ", reg_formula)
  } else {
    reg_formula <- paste0(dep_var, " ~ ", predictor_list)
  }
  
  # construct an interaction from the list of interaction terms
  int_formula <- paste(interaction_terms, collapse = "*")
  em_formula <- paste0(" ~ ",
                       paste(interaction_terms, collapse = " | "))
  reg_formula <- paste0(reg_formula, " + ", int_formula) 
  
  if (story_dummies) {
    # add story fixed effects
    reg_formula <- paste0(reg_formula, " + story_label") 
  }
  
  if (day_dummies) {
    # add day fixed effects
    reg_formula <- paste0(reg_formula, " + survey_day")
  }
  
  print(reg_formula)
  
  if (logit_model) {
    
    # for the logit model, do not calculate any additional quantities
    
    glmod <- glm(as.formula(reg_formula), data = qz, family = "binomial")
    return(list(model = glmod))
    
  } else {
    lmod <- lm_robust(as.formula(reg_formula), data = qz, 
                      clusters = session_id,
                      se_type = "CR0")
    
    # save coefficients and errors
    lmod_tidy <- tidy(lmod)
    
    # prepare the emmean object to calculate contrasts or marginal means
    emm <- emmeans(lmod, as.formula(em_formula), rg.limit = 100000)
    
    if (quantities_calculate == "contrasts") {
      
      # calculate contrasts
      emm_cont <- summary(pairs(emm, simple = contrast_var))
      emm_cont <- emm_cont |>
        # select only the specified contrast
        filter(contrast == contrast_filter) |>
        # reverse the sign so that the contrast is 
        # "state media - critical media" or "supporters - critics"
        mutate(estimate = -estimate,
               # add confidence interval
               conf.low = estimate - 1.96*SE,
               conf.high = estimate + 1.96*SE)
      
      return(list(coef_ci = lmod_tidy,
                  model = lmod,
                  effect = emm_cont))
      
      
    } else if (quantities_calculate == "marginal means") {
      
      # calculate marginal means
      emm_cont <- summary(emm) |>
        rename(estimate = emmean,
               conf.low = lower.CL,
               conf.high = upper.CL)
      
      return(list(coef_ci = lmod_tidy,
                  model = lmod,
                  effect = emm_cont))
      
    } else if (quantities_calculate == "model only") {
      
      return(list(model = lmod))
      
    }
    
    
  }
}

# function to examine news source usage/trust in the main/OMI/Levada samples
media_attitudes <- function(dep_var, media_type, study) {
  
  reg_formula <- paste0(dep_var, 
                        " ~ pres_approval_cat + age + female + education")
  
  if (study == "main") {
    usage_model <- lm(as.formula(reg_formula),
                      data = main_resp |>
                        drop_na(pres_approval_cat))
  } else if (study == "omi") {
    usage_model <- lm(as.formula(reg_formula),
                      data = omi_resp |> 
                        mutate(age = age_ordinal,
                               education = education_higher) |>
                        drop_na(pres_approval_cat))
  } else if (study == "levada") {
    usage_model <- lm(as.formula(reg_formula),
                      data = levada_resp |>
                        drop_na(pres_approval_cat),
                      weights = weight)
  }
  
  # calculate marginal means/probabilities depending on presidential approval
  usage_means <- ggpredict(usage_model, 
                           terms = "pres_approval_cat",
                           vcov.fun = "vcovHC",
                           vcov.type = "HC0") |>
    as.data.frame() |>
    mutate(x = factor(x, levels = c("Certainly disapprove", "Somewhat disapprove",
                                    "Somewhat approve", "Certainly approve")),
           media = media_type) |>
    rename(estimate = predicted)
  
  # add heteroskedasticity-robust errors
  usage_vcov <- vcovHC(usage_model, type = "HC0")
  
  return(list(model = usage_model, vcov_hc = usage_vcov, means = usage_means))
  
}

# function to run multinomial models 
# (for the OMI survey, evaluations of state media and their characteristics)
run_multinom <- function(eval_question, reg_formula,
                         given_indep_knowledge = F) {
  
  # split the dependent variable name into criterion (accuracy, bias, etc.)
  # and outlet name
  criterion <- str_split(eval_question, "_")[[1]][2]
  outlet <- str_remove(eval_question, paste0("ranking_", 
                                             criterion, "_"))
  # rename the outlet for figures
  outlet <- case_when(
    outlet == "Russia_24" ~ "Russia-24", 
    outlet == "Echo" ~ "Echo of Moscow",
    outlet == "TV1" ~ "Channel One",
    TRUE ~ outlet)
  
  # calculate marginal means/probabilities of trusting/using specific news 
  # sources
  if (given_indep_knowledge) { 
    
    # conditional means given the knowledge of any independent media 
    
    # control for the knowledge of the state outlet in question
    if (outlet %in% c("RIA", "RT")) {
      outlet_knowledge <- paste0(" + source_known_", outlet)
    } else if (outlet == "Channel One") {
      outlet_knowledge <- " + source_known_TV1"
    } else if (outlet == "Russia-24") {
      outlet_knowledge <- " + source_known_Russia_1_24"
    } else {
      outlet_knowledge <- ""
    }
    reg_formula <- paste0(eval_question, reg_formula, outlet_knowledge)
    
    # run the multinomial model
    mod_obj <- multinom(as.formula(reg_formula), data = omi_rankings_aw)
    
    # calculate marginal means depending on presidential approval and
    # knowledge of indep. media
    mod_probs <- effect("pres_approval_dummy*source_known_independent", 
                        mod_obj)
    # reformat the data frame
    mod_probs <- as.data.frame(mod_probs) |>
      select(pres_approval_dummy, source_known_independent,
             starts_with("prob"),
             starts_with("se.prob")) |>
      # fix variable names
      rename_at(vars(contains("prob")),
                list(~str_replace(., "prob.", "prob_"))) |>
      # reshape to a long format
      pivot_longer(-c(pres_approval_dummy, source_known_independent),
                   names_to = c(".value", "response"),
                   names_sep = "_")
    
    
  } else {
    
    # means for the overall sample
    
    reg_formula <- paste0(eval_question, reg_formula)
    
    # run the multinomial model
    mod_obj <- multinom(as.formula(reg_formula), data = omi_rankings)
    
    # calculate marginal means depending on presidential approval 
    mod_probs <- effect("pres_approval_dummy", mod_obj)
    # reformat the data frame
    mod_probs <- as.data.frame(mod_probs) |>
      select(pres_approval_dummy,
             starts_with("prob"),
             starts_with("se.prob")) |>
      # fix variable names
      rename_at(vars(contains("prob")),
                list(~str_replace(., "prob.", "prob_"))) |>
      # reshape to a long format
      pivot_longer(-pres_approval_dummy,
                   names_to = c(".value", "response"),
                   names_sep = "_")
    
  }
  
  # add confidence intervals, dimension (accuracy, bias, etc.), and outlet name
  mod_probs <- mod_probs |>
    mutate(conf.low = prob - 1.96*se.prob,
           conf.high = prob + 1.96*se.prob,
           dimension = criterion,
           source = outlet)
  
  return(list(model = mod_obj, probs = mod_probs))
}

